3  Results

Code
library(ggplot2)
<<<<<<< HEAD
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
Code
library(tidyr)
library(ggalluvial)
======= library(dplyr) library(tidyr) library(naniar) library(ggmap) library(lubridate) library(sf) library(dygraphs) library(xts) library(ggiraph) library(sf)
>>>>>>> origin/main
Code <<<<<<< HEAD
data <- read.csv("NYPD_Arrest_Data.csv", na.strings = c("(null)", "N/A"))
data <- na.omit(data)
=======
data <- read.csv("NYPD_Arrest_Data.csv", na.strings = c("(null)", "N/A"))
>>>>>>> origin/main
Code <<<<<<< HEAD
data <- data |>
  mutate(ARREST_BORO = case_when(
    ARREST_BORO == "B" ~ "Bronx",
    ARREST_BORO == "S" ~ "Staten Island",
    ARREST_BORO == "K" ~ "Brooklyn",
    ARREST_BORO == "M" ~ "Manhattan",
    ARREST_BORO == "Q" ~ "Queens"
  )) |>
  mutate(LAW_CAT_CD = case_when(
    LAW_CAT_CD == "F" ~ "Felony",
    LAW_CAT_CD == "M" ~ "Misdemeanor",
    LAW_CAT_CD == "V" ~ "Violation",
    .default = NA
  )) |>
  mutate(JURISDICTION_CODE = case_when(
    JURISDICTION_CODE == 0 ~ "Patrol",
    JURISDICTION_CODE == 1 ~ "Transit",
    JURISDICTION_CODE == 2 ~ "Housing",
    .default = "Non NYPD"
  ))
=======
### Data Preprocessing step
data <- na.omit(data)
data$ARREST_DATE <- as.Date(data$ARREST_DATE, format = "%m/%d/%Y")
data <- data |>
  mutate(ARREST_BORO = case_when(
    ARREST_BORO == "B" ~ "Bronx",
    ARREST_BORO == "S" ~ "Staten Island",
    ARREST_BORO == "K" ~ "Brooklyn",
    ARREST_BORO == "M" ~ "Manhattan",
    ARREST_BORO == "Q" ~ "Queens"
  )) 
>>>>>>> origin/main
Code <<<<<<< HEAD
top_15_ofns_desc <- data |>
  group_by(OFNS_DESC) |>
  summarise(Count = n()) |>
  arrange(desc(Count)) |>
  head(15)

# Plot the top 15 offense descriptions as percentages
ggplot(top_15_ofns_desc, aes(x = reorder(OFNS_DESC, Count), y = Count)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(
    title = "Top 15 Arrest Categories",
    x = "Offense Description",
    y = "Number of Arrests"
  ) + theme_minimal()
=======
daily_data <- data |>
  group_by(ARREST_DATE) |>
  summarise(Count = n(), .groups = "drop")


ggplot(daily_data, aes(x = ARREST_DATE, y = Count)) +
  geom_line(size = 0.8, alpha = 0.8) +
  labs(
    title = "Daily NYPD Arrests",
    subtitle = "From January 1st to September 30th",
    x = "Date",
    y = "Number of Arrests",
    color = "Borough"
  ) +
  scale_x_date(date_breaks = "2 week", date_labels = "%b %d") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10)
  )
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
>>>>>>> origin/main

Code <<<<<<< HEAD
law_cat_cd_count <- data |>
  group_by(LAW_CAT_CD) |>
  summarize(Count = n()) |>
  arrange(desc(Count)) |>
  drop_na(LAW_CAT_CD) |> 
  mutate(Percentage = (Count / sum(Count)) * 100)

ggplot(law_cat_cd_count, aes(x = reorder(LAW_CAT_CD, -Percentage), y = Percentage)) +
  geom_bar(stat = "identity") +
  ggtitle("Number of Arrests by LAW_CAT_CD") +
  xlab("LAW_CAT_CD") +
  ylab("Percentage of Total Arrests (%)")
=======
data <- data |>
  mutate(Weekday = wday(ARREST_DATE, label = TRUE, abbr = TRUE))  

weekday_summary <- data |>
  group_by(ARREST_DATE, Weekday) |>
  summarise(Count = n(), .groups = "drop")  

ggplot(weekday_summary, aes(x = ARREST_DATE, y = Count, group = Weekday, fill = Count)) +
  geom_area(alpha = 0.8) +  
  geom_line(size = 1, color = "black") +  
  facet_grid(Weekday ~ ., scales = "fixed", switch = "y") +  
  scale_fill_gradient(
    low = "lightblue",  
    high = "darkblue",  
    name = "Arrest Count"  
  ) +
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  labs(
    title = "Daily NYPD Arrests by Weekday",
    x = "Date",
    y = "Number of Arrests"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1), 
    strip.text.y.left = element_text(angle = 0),      
    strip.placement = "outside",                     
    legend.position = "right"                    
  )
>>>>>>> origin/main

Code <<<<<<< HEAD
arrest_boro_count <- data |>
  group_by(ARREST_BORO) |>
  summarize(Count = n()) |>
  arrange(desc(Count))

ggplot(arrest_boro_count, aes(x = reorder(ARREST_BORO, -Count), y = Count)) +
  geom_bar(stat = "identity") +
  ggtitle("Number of Arrests by ARREST_BORO") +
  xlab("ARREST_BORO") +
  ylab("Number of Arrests")

Code
jcode_count <- data |>
  group_by(JURISDICTION_CODE) |>
  summarize(Count = n()) |>
  arrange(desc(Count))

ggplot(jcode_count, aes(x = reorder(JURISDICTION_CODE, -Count), y = Count)) +
  geom_bar(stat = "identity") +
  ggtitle("Number of Arrests by JURISDICTION_CODE") +
  xlab("JURISDICTION_CODE") +
  ylab("Number of Arrests")

Code
age_count <- data |>
  group_by(AGE_GROUP) |>
  summarize(Count = n()) |>
  arrange(desc(Count))

ggplot(age_count, aes(x = AGE_GROUP, y = Count)) +
  geom_bar(stat = "identity") +
  ggtitle("Number of Arrests by AGE_GROUP") +
  xlab("AGE_GROUP") +
  ylab("Number of Arrests")

Code
race_count <- data |>
  group_by(PERP_RACE) |>
  summarize(Count = n()) |>
  arrange(desc(Count))

ggplot(race_count, aes(x = reorder(PERP_RACE, -Count), y = Count)) +
  geom_bar(stat = "identity") +
  ggtitle("Number of Arrests by PERP_RACE") +
  xlab("PERP_RACE") +
  ylab("Number of Arrests") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Code
aggregated_data <- na.omit(data) |>
  mutate(PERP_RACE = case_when(
    PERP_RACE %in% c("UNKNOWN", "AMERICAN INDIAN/ALASKAN NATIVE") ~ "Others",
    TRUE ~ PERP_RACE
  )) |> 
  group_by(PERP_SEX, PERP_RACE, AGE_GROUP, LAW_CAT_CD) %>%
  summarise(Count = n(), .groups = "drop") %>%
  rename(
    Sex = PERP_SEX,
    Race = PERP_RACE,
    AgeGroup = AGE_GROUP,
    CrimeType = LAW_CAT_CD
  )
aggregated_data
# A tibble: 160 × 5
   Sex   Race                     AgeGroup CrimeType   Count
   <chr> <chr>                    <chr>    <chr>       <int>
 1 F     ASIAN / PACIFIC ISLANDER 18-24    Felony         93
 2 F     ASIAN / PACIFIC ISLANDER 18-24    Misdemeanor   195
 3 F     ASIAN / PACIFIC ISLANDER 18-24    Violation       3
 4 F     ASIAN / PACIFIC ISLANDER 25-44    Felony        334
 5 F     ASIAN / PACIFIC ISLANDER 25-44    Misdemeanor   587
 6 F     ASIAN / PACIFIC ISLANDER 25-44    Violation      27
 7 F     ASIAN / PACIFIC ISLANDER 45-64    Felony        187
 8 F     ASIAN / PACIFIC ISLANDER 45-64    Misdemeanor   342
 9 F     ASIAN / PACIFIC ISLANDER 45-64    Violation       2
10 F     ASIAN / PACIFIC ISLANDER 65+      Felony         33
# ℹ 150 more rows
Code
ggplot(aggregated_data, aes(axis1 = Sex, axis2 = Race, axis3 = AgeGroup, axis4 = CrimeType, y = Count)) +
  geom_alluvium(aes(fill = CrimeType), width = 0.2, alpha = 0.8) +
  geom_stratum(width = 0.2, fill = "lightgrey", color = "black") +
  geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 3) +
  scale_x_discrete(limits = c("Sex", "Race", "AgeGroup", "CrimeType"), expand = c(0.1, 0.1)) +
  theme_minimal() +
  labs(
    title = "Alluvial Plot of Demographics and Crime Type Relationships",
    x = "Demographic and Crime Categories",
    y = "Number of Arrests",
    fill = "Crime Type"
  )

Code
ggplot(aggregated_data, aes(axis1 = Sex, axis2 = Race, axis3 = AgeGroup, axis4 = CrimeType, y = Count)) +
  geom_flow(color = "black") +
  geom_stratum(width = 0.2, fill = "lightgrey", color = "black") +
  geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 3) +
  scale_x_discrete(limits = c("Sex", "Race", "AgeGroup", "CrimeType"), expand = c(0.1, 0.1)) +
  theme_minimal() +
  labs(
    title = "Alluvial Plot of Demographics and Crime Type Relationships",
    x = "Demographic and Crime Categories",
    y = "Number of Arrests",
    fill = "Crime Type"
  )

Code
aggregated_data_noother <- na.omit(data) |> 
  filter(!(PERP_RACE %in% c("UNKNOWN", "AMERICAN INDIAN/ALASKAN NATIVE"))) |> group_by(PERP_SEX, PERP_RACE, AGE_GROUP, LAW_CAT_CD) |>
  summarise(Count = n(), .groups = "drop") |>
  rename(
    Sex = PERP_SEX,
    Race = PERP_RACE,
    AgeGroup = AGE_GROUP,
    CrimeType = LAW_CAT_CD
  )
ggplot(aggregated_data_noother, aes(axis1 = Sex, axis2 = Race, axis3 = AgeGroup, axis4 = CrimeType, y = Count)) +
  geom_flow(color = "black") +
  geom_stratum(width = 0.2, fill = "lightgrey", color = "black") +
  geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 3) +
  scale_x_discrete(limits = c("Sex", "Race", "AgeGroup", "CrimeType"), expand = c(0.1, 0.1)) +
  theme_minimal() +
  labs(
    title = "Alluvial Plot of Demographics and Crime Type Relationships",
    x = "Demographic and Crime Categories",
    y = "Number of Arrests",
    fill = "Crime Type"
  )

=======
daily_summary_by_borough <- data |>
  group_by(ARREST_DATE, ARREST_BORO) |>
  summarise(Count = n(), .groups = "drop")

wide_data <- tidyr::pivot_wider(daily_summary_by_borough, 
                                names_from = ARREST_BORO, 
                                values_from = Count, 
                                values_fill = 0)

wide_data$ARREST_DATE <- as.Date(wide_data$ARREST_DATE)

time_series_obj <- xts(wide_data[,-1], order.by = wide_data$ARREST_DATE)

custom_colors <- c(
  "Bronx" = "#E7298A",    
  "Brooklyn" = "#6495ED", 
  "Manhattan" = "#E6AB02",
  "Queens" = "#66A61E",   
  "Staten Island" = "#7570B3" 
)

dygraph(time_series_obj, main = "Daily NYPD Arrests") |>
  dyAxis("y", label = "Number of Arrests", valueRange = c(0, 400)) |>
  dyAxis("x", label = "Date") |>
  dyRangeSelector() |>
  dyLegend(width = 300, labelsSeparateLines = TRUE) |>
  dyOptions(colors = unname(custom_colors), strokeWidth = 2, gridLineColor = "#DDDDDD") 
Code
daily_summary_by_borough <- data |>
  group_by(ARREST_DATE, ARREST_BORO) |>
  summarise(Count = n(), .groups = "drop")

ggplot(daily_summary_by_borough, aes(x = ARREST_DATE, y = Count, color = ARREST_BORO)) +
  geom_line(size = 0.8, alpha = 0.8) +
  labs(
    title = "Daily NYPD Arrests",
    subtitle = "From January 1st to September 30th",
    x = "Date",
    y = "Number of Arrests",
    color = "Borough"
  ) +
  scale_x_date(date_breaks = "2 week", date_labels = "%b %d") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10)
  )

Code
weekly_summary_by_borough <- data |>
  mutate(Week = floor_date(ARREST_DATE, unit = "week")) |>  # Create a 'Week' column
  group_by(Week, ARREST_BORO) |>                           # Group by week and borough
  summarise(Count = n(), .groups = "drop") 
max_week <- max(weekly_summary_by_borough$Week)
weekly_summary_filtered <- weekly_summary_by_borough |>
  filter(Week < max_week)

ggplot(weekly_summary_filtered, aes(x = Week, y = Count, color = ARREST_BORO)) +
  geom_line(size = 1, alpha = 0.8) +
  scale_x_date(date_breaks = "2 week", date_labels = "%b %d") + # Weekly x-axis labels
  labs(
    title = "Weekly NYPD Arrests",
    subtitle = "From January 1st to September 30th",
    x = "Week",
    y = "Number of Arrests",
    color = "Borough"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.text.x = element_text(angle = 45, hjust = 1), # Rotate x-axis labels
    axis.title.x = element_text(size = 14),
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10)
  )

Code
nyc_sf <- read_sf("new-york-city-boroughs.geojson")
data <- data |>
  filter(Longitude != 0 & Latitude != 0)
arrest_sf <- st_as_sf(data, coords = c("Longitude", "Latitude"), crs = 4326)

borough_arrest_count <- arrest_sf |>
  st_drop_geometry() |>
  group_by(ARREST_BORO) |>
  summarise(total_arrests = n())

nyc_sf <- nyc_sf |>
  left_join(borough_arrest_count, by = c("name" = "ARREST_BORO")) |>
  mutate(tooltip = paste(name, "<br>Total Arrests:", total_arrests))

interactive_map <- ggplot() +
  geom_sf_interactive(
    data = nyc_sf,
    aes(fill = name, geometry = geometry, tooltip = tooltip),
    color = "black",
    size = 0.3,
    alpha = 0.5
  ) +
  geom_sf(
    data = arrest_sf,
    aes(geometry = geometry),
    color = "red",
    size = 0.05,
    alpha = 0.4,
    stroke = 0.3,
    shape = 1
  ) +
  labs(
    title = "Arrest Locations in NYC",
    x = "Longitude",
    y = "Latitude",
    fill = "Borough"
  ) +
  coord_sf() +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    plot.subtitle = element_text(size = 12, face = "italic"),
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10),
    plot.margin = margin(1, 1, 1, 1, "cm")
  )

girafe(ggobj = interactive_map)
>>>>>>> origin/main